home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Amiga Format CD 52
/
Amiga Format AFCD52 (Issue 136, May 2000).iso
/
-serious-
/
programming
/
other
/
jikes-1.11
/
src
/
stream.h
< prev
next >
Wrap
C/C++ Source or Header
|
2000-02-23
|
12KB
|
409 lines
// $Id: stream.h,v 1.15 2000/01/07 00:25:53 lord Exp $
//
// This software is subject to the terms of the IBM Jikes Compiler
// License Agreement available at the following URL:
// http://www.ibm.com/research/jikes.
// Copyright (C) 1996, 1998, International Business Machines Corporation
// and others. All Rights Reserved.
// You must accept the terms of that agreement to use this software.
//
#ifndef stream_INCLUDED
#define stream_INCLUDED
#include "config.h"
#include <sys/stat.h>
#include <limits.h>
#include <iostream.h>
#include <stddef.h>
#include <stdio.h>
#include "javadef.h"
#include "javasym.h"
#include "tuple.h"
#include "tab.h"
#include "lookup.h"
class Control;
class Input_info;
class Scanner;
class Symbol;
class FileSymbol;
class ZipFile;
class StreamError
{
public:
enum StreamErrorKind
{
BAD_TOKEN,
BAD_OCTAL_CONSTANT,
EMPTY_CHARACTER_CONSTANT,
UNTERMINATED_CHARACTER_CONSTANT,
UNTERMINATED_COMMENT,
UNTERMINATED_STRING_CONSTANT,
INVALID_HEX_CONSTANT,
INVALID_FLOATING_CONSTANT_EXPONENT,
INVALID_UNICODE_ESCAPE
};
unsigned start_location,
end_location;
StreamErrorKind kind;
void Initialize(StreamErrorKind kind_, unsigned start_location_, unsigned end_location_)
{
kind = kind_;
start_location = start_location_;
end_location = end_location_;
return;
}
};
//
// LexStream holds a stream of tokens generated from an input and
// provides methods to retrieve information from the stream.
//
class LexStream
{
public:
typedef int TypeIndex;
typedef int TokenIndex;
typedef int CommentIndex;
enum { LEX_INFINITY = INT_MAX }; // the largest possible value for TokenIndex
FileSymbol *file_symbol;
inline TokenIndex Next(TokenIndex i)
{ return (++i < token_stream.Length() ? i : token_stream.Length() - 1); }
inline TokenIndex Previous(TokenIndex i) { return (i <= 0 ? 0 : i - 1); }
inline TokenIndex Peek() { return Next(index); }
inline void Reset(TokenIndex i = 1) { index = Previous(i); }
inline TokenIndex Gettoken() { return index = Next(index); }
inline TokenIndex Gettoken(TokenIndex end_token)
{ return index = (index < end_token ? Next(index) : token_stream.Length() - 1); }
inline TokenIndex Badtoken() { return 0; }
inline unsigned Kind(TokenIndex i) { return tokens[i].Kind(); }
inline unsigned Location(TokenIndex i) { return tokens[i].Location(); }
inline unsigned Line(TokenIndex i) { return FindLine(tokens[i].Location()); }
inline unsigned Column(TokenIndex i) { return columns ? columns[i] : (input_buffer ? FindColumn(tokens[i].Location()) : 0); }
inline bool AfterEol(TokenIndex i) { return (i < 1 ? true : Line(i - 1) < Line(i)); }
inline bool IsDeprecated(TokenIndex i) { return tokens[i].Deprecated(); }
inline TokenIndex MatchingBrace(TokenIndex i) { return tokens[i].additional_info.right_brace; }
wchar_t *NameString(TokenIndex i)
{
return (NameSymbol(i) || LiteralSymbol(i) ? tokens[i].additional_info.symbol -> Name()
: KeywordName(tokens[i].Kind()));
}
int NameStringLength(TokenIndex i)
{
return (NameSymbol(i) || LiteralSymbol(i) ? tokens[i].additional_info.symbol -> NameLength()
: wcslen(KeywordName(tokens[i].Kind())));
}
class LiteralSymbol *LiteralSymbol(TokenIndex);
class NameSymbol *NameSymbol(TokenIndex);
char *FileName();
size_t FileNameLength();
inline int LineLength(unsigned line_no) { return locations[line_no + 1] - locations[line_no]; }
inline int LineStart(unsigned line_no) { return locations[line_no]; }
inline int LineEnd(unsigned line_no) { return locations[line_no + 1] - 1; }
inline int LineSegmentLength(TokenIndex i)
{ return Tab::Wcslen(input_buffer, tokens[i].Location(), LineEnd(Line(i))); }
//
// For a sequence of tokens in a given range find out how many large
// characters they contain and compute the appropriate offset.
//
inline int WcharOffset(TokenIndex start, TokenIndex end)
{
int offset = 0;
for (TokenIndex i = start; i <= end; i++)
{
for (wchar_t *str = NameString(i); *str; str++)
{
if (*str > 0xff)
offset += 5;
}
}
return offset;
}
//
// When only an end token is supplied, the start token is assume to be the first one on the same line.
//
inline int WcharOffset(TokenIndex end)
{
TokenIndex start;
unsigned the_line = Line(end);
for (start = end; Line(start) == the_line; start--)
;
start++;
return WcharOffset(start, end);
}
wchar_t *InputBuffer() { return input_buffer; }
size_t InputBufferLength() { return input_buffer_length; }
CommentIndex FirstComment(TokenIndex);
inline int NumTypes() { return type_index.Length(); }
inline TokenIndex Type(int i) { return types[i]; }
inline int NumTokens() { return token_stream.Length(); }
inline int NumComments() { return comment_stream.Length(); }
inline TokenIndex PrecedingToken(CommentIndex i) { return comments[i].previous_token; }
inline unsigned CommentLocation(CommentIndex i) { return comments[i].location; }
inline wchar_t *CommentString(CommentIndex i) { return comments[i].string; }
inline int CommentStringLength(CommentIndex i) { return comments[i].length; }
inline int NumBadTokens() { return bad_tokens.Length(); }
#ifdef TEST
int file_read;
#endif
//*
//* Constructors and Destructor.
//*
LexStream(Control &control_, FileSymbol *file_symbol_) : file_symbol(file_symbol_),
#ifdef TEST
file_read(0),
#endif
tokens(NULL),
columns(NULL),
token_stream(12, 16),
comments(NULL),
comment_stream(10, 8),
locations(NULL),
line_location(12, 8),
initial_reading_of_input(true),
input_buffer(NULL),
input_buffer_length(0),
comment_buffer(NULL),
control(control_)
{}
bool ComputeColumns()
{
RereadInput();
if (input_buffer)
InitializeColumns();
DestroyInput();
return (columns != NULL);
}
void RereadInput();
~LexStream();
void DestroyInput()
{
delete [] input_buffer;
input_buffer = NULL;
delete [] comment_buffer;
comment_buffer = NULL;
}
void SortMessages();
void PrintMessages();
void PrintEmacsMessage(int);
void PrintSmallSource(int);
void PrintLargeSource(int);
void PrintMessage(StreamError::StreamErrorKind);
void SetUpComments()
{
RereadInput();
//
// Calculate the length of the string required to save the comments.
// Allocate the buffer, save the comments in the buffer and update their
// respective "string" pointer.
//
int length = 0,
i;
for (i = 1; i < comment_stream.Length(); i++)
length += (comments[i].length + 1);
comment_buffer = new wchar_t[length];
wchar_t *ptr = comment_buffer;
for (i = 1; i < comment_stream.Length(); i++)
{
memmove(ptr, &(input_buffer[comments[i].location]), comments[i].length * sizeof(wchar_t));
comments[i].string = ptr;
ptr += comments[i].length;
*ptr++ = U_NULL;
}
return;
}
#ifdef TEST
void LexStream::Dump(); // temporary function used to dump token stream.
#endif
//
// Return the total size of space allocated for the tokens.
//
size_t TokenSpaceAllocated(void)
{
return token_stream.Length() * sizeof(Token);
}
//
// Return the total size of space allocated for the comments.
//
size_t CommentSpaceAllocated(void)
{
return comment_stream.Length() * sizeof(Comment);
}
private:
int hexvalue(wchar_t ch);
enum UnicodeLexerState
{
RAW,
CR,
QUOTE,
UNICODE_ESCAPE,
UNICODE_ESCAPE_DIGIT_0,
UNICODE_ESCAPE_DIGIT_1,
UNICODE_ESCAPE_DIGIT_2
};
friend class Scanner;
class Token
{
//
// It is expected that a location will be set for every token. Therefore,
// as we are setting the location, we also reset the deprecated bit to 0.
// If it is subsequently discovered that the token is followed by one or more
// deprecated tags then the bit is set to 1 by an invocation of the
// function SetDeprecated. Note that a better way to resetting all the bits in
// "info" is to use the function ResetInfoAndSetLocation defined below, instead
// of using SetLocation
//
inline void SetLocation(unsigned location) { assert(location <= 0x00FFFFFF); info = (info & 0x0000007F) | (location << 8); }
public:
unsigned info;
union
{
Symbol *symbol;
TokenIndex right_brace;
} additional_info;
//
// To just reset the info, this function should be invoked with a location value of 0.
//
inline void ResetInfoAndSetLocation(unsigned location)
{
assert(location <= 0x00FFFFFF);
info = (location << 8);
additional_info.symbol = NULL;
}
inline unsigned Location() { return (info >> 8); }
inline void SetKind(unsigned kind) { assert(kind <= 0x0000007F); info = (info & 0xFFFFFF80) | kind; }
inline unsigned Kind() { return (info & 0x0000007F); }
inline void SetDeprecated() { info |= 0x00000080; }
inline bool Deprecated() { return ((info & 0x00000080) != 0); }
inline void SetSymbol(Symbol *symbol) { additional_info.symbol = symbol; }
inline void SetRightBrace(TokenIndex rbrace) { additional_info.right_brace = rbrace; }
};
TokenIndex GetNextToken(unsigned location = 0)
{
TokenIndex index = token_stream.NextIndex();
token_stream[index].ResetInfoAndSetLocation(location);
return index;
}
class Comment
{
public:
TokenIndex previous_token;
unsigned location;
unsigned length;
wchar_t *string;
};
Tuple<StreamError> bad_tokens;
TokenIndex index;
Token *tokens;
unsigned short *columns;
ConvertibleArray<Token> token_stream;
Comment *comments;
ConvertibleArray<Comment> comment_stream;
unsigned *locations;
ConvertibleArray<unsigned> line_location;
TokenIndex *types;
ConvertibleArray<TokenIndex> type_index;
void InitializeColumns();
void CompressSpace();
bool initial_reading_of_input;
wchar_t *input_buffer;
size_t input_buffer_length;
wchar_t *comment_buffer;
Control &control;
void ReadInput();
void ProcessInput(char *, long);
void ProcessInputAscii(char *, long);
#ifdef HAVE_LIB_ICU_UC
void ProcessInputUnicode(char *, long);
#endif
wchar_t *KeywordName(int);
unsigned FindLine(unsigned location);
unsigned FindColumn(unsigned location)
{
assert(locations);
return Tab::Wcslen(input_buffer, locations[FindLine(location)], location);
}
};
#endif